# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hysop import __DEFAULT_NUMBA_TARGET__
from hysop.core.arrays.array import Array
try:
import numba as nb
from numba import prange
HAS_NUMBA = True
except ImportError:
HAS_NUMBA = False
[docs]
def make_numba_signature(*args, **kwds):
raise_on_cl_array = kwds.pop("raise_on_cl_array", True)
if kwds:
msg = "Unknown kwds {}.".forma(kwds.keys())
raise RuntimeError(kwds)
dtype_to_ntype = {
int: nb.int64,
float: nb.float64,
np.int8: nb.int8,
np.int16: nb.int16,
np.int32: nb.int32,
np.int64: nb.int64,
np.uint8: nb.uint8,
np.uint16: nb.uint16,
np.uint32: nb.uint32,
np.uint64: nb.uint64,
np.float32: nb.float32,
np.float64: nb.float64,
np.complex64: nb.complex64,
np.complex128: nb.complex128,
}
sizes = ("m", "n", "p", "q", "r", "s") + tuple(f"n{i}" for i in range(10))
registered_sizes = {}
def format_shape(*shape):
res = "("
for i, s in enumerate(shape):
if s in registered_sizes:
sc = registered_sizes[s]
else:
sc = sizes[len(registered_sizes)]
registered_sizes[s] = sc
res += sc
if i != len(shape) - 1:
res += ","
res += ")"
return res
numba_args = ()
numba_layout = ()
for i, a in enumerate(args):
from hysop.backend.device.opencl import clArray
if isinstance(a, Array):
a = a.handle
if isinstance(a, clArray.Array):
# some opencl arrays can be mapped to host
if raise_on_cl_array:
msg = "Numba signature: Got a cl.Array or hysop.OpenClArray for argument {} (shape={}, dtype={})."
msg = msg.format(i, a.shape, a.dtype)
raise ValueError(msg)
assert a.dtype.type in dtype_to_ntype, a.dtype.type
dtype = dtype_to_ntype[a.dtype.type]
if a.flags.c_contiguous:
na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="C")
elif a.flags.f_contiguous:
na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="F")
else:
na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="A")
numba_layout += (format_shape(*a.shape),)
elif isinstance(a, np.ndarray):
assert a.dtype.type in dtype_to_ntype, a.dtype.type
dtype = dtype_to_ntype[a.dtype.type]
if a.flags.c_contiguous:
na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="C")
elif a.flags.f_contiguous:
na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="F")
else:
na = nb.types.Array(dtype=dtype, ndim=a.ndim, layout="A")
numba_layout += (format_shape(*a.shape),)
elif isinstance(a, np.dtype):
a = dtype_to_ntype[a.type]
numba_layout += ("()",)
elif isinstance(a, type):
na = dtype_to_ntype[a]
numba_layout += ("()",)
elif type(a) in dtype_to_ntype:
na = dtype_to_ntype[type(a)]
numba_layout += ("()",)
else:
msg = f"Uknown argument type {type(a).__mro__}."
raise NotImplementedError(msg)
numba_args += (na,)
return nb.void(*numba_args), ",".join(numba_layout)
[docs]
def bake_numba_copy(dst, src, target=None):
if target is None:
target = __DEFAULT_NUMBA_TARGET__
signature, layout = make_numba_signature(dst, src)
if dst.ndim == 1:
@nb.guvectorize([signature], layout, target=target, nopython=True, cache=True)
def copy(dst, src):
for i in range(0, dst.shape[0]):
dst[i] = src[i]
elif dst.ndim == 2:
@nb.guvectorize([signature], layout, target=target, nopython=True, cache=True)
def copy(dst, src):
for i in prange(0, dst.shape[0]):
for j in range(0, dst.shape[1]):
dst[i, j] = src[i, j]
elif dst.ndim == 3:
@nb.guvectorize([signature], layout, target=target, nopython=True, cache=True)
def copy(dst, src):
for i in prange(0, dst.shape[0]):
for j in prange(0, dst.shape[1]):
for k in range(0, dst.shape[2]):
dst[i, j, k] = src[i, j, k]
elif dst.ndim == 4:
@nb.guvectorize([signature], layout, target=target, nopython=True, cache=True)
def copy(dst, src):
for i in prange(0, dst.shape[0]):
for j in prange(0, dst.shape[1]):
for k in prange(0, dst.shape[2]):
for l in range(0, dst.shape[3]):
dst[i, j, k, l] = src[i, j, k, l]
else:
raise NotImplementedError(dst.ndim)
def _exec_copy(copy=copy, dst=dst, src=src):
copy(dst, src)
return _exec_copy
[docs]
def bake_numba_accumulate(dst, src, target=None):
if target is None:
target = __DEFAULT_NUMBA_TARGET__
signature, layout = make_numba_signature(dst, src)
if dst.ndim == 1:
@nb.guvectorize([signature], layout, target=target, nopython=True, cache=True)
def accumulate(dst, src):
for i in range(0, dst.shape[0]):
dst[i] += src[i]
elif dst.ndim == 2:
@nb.guvectorize([signature], layout, target=target, nopython=True, cache=True)
def accumulate(dst, src):
for i in prange(0, dst.shape[0]):
for j in range(0, dst.shape[1]):
dst[i, j] += src[i, j]
elif dst.ndim == 3:
@nb.guvectorize([signature], layout, target=target, nopython=True, cache=True)
def accumulate(dst, src):
for i in prange(0, dst.shape[0]):
for j in prange(0, dst.shape[1]):
for k in range(0, dst.shape[2]):
dst[i, j, k] += src[i, j, k]
elif dst.ndim == 4:
@nb.guvectorize([signature], layout, target=target, nopython=True, cache=True)
def accumulate(dst, src):
for i in prange(0, dst.shape[0]):
for j in prange(0, dst.shape[1]):
for k in prange(0, dst.shape[2]):
for l in range(0, dst.shape[3]):
dst[i, j, k, l] += src[i, j, k, l]
else:
raise NotImplementedError(dst.ndim)
def _exec_accumulate(accumulate=accumulate, dst=dst, src=src):
accumulate(dst, src)
return _exec_accumulate
[docs]
def bake_numba_transpose(src, dst, axes, target=None):
# inefficient permutations
if target is None:
target = __DEFAULT_NUMBA_TARGET__
signature, layout = make_numba_signature(dst, src)
assert src.ndim == dst.ndim
assert dst.shape == tuple(src.shape[i] for i in axes)
assert dst.dtype == src.dtype
ndim = src.ndim
def noop(dst, src):
pass
if ndim == 1:
transpose = noop
elif ndim == 2:
if axes == (0, 1):
transpose == noop
elif axes == (1, 0):
@nb.guvectorize(
[signature], layout, target=target, nopython=True, cache=True
)
def transpose(dst, src):
for i in prange(0, src.shape[0]):
for j in range(0, src.shape[1]):
dst[j, i] = src[i, j]
else:
raise NotImplementedError
elif ndim == 3:
if axes == (0, 1, 2):
transpose == noop
elif axes == (0, 2, 1):
@nb.guvectorize(
[signature], layout, target=target, nopython=True, cache=True
)
def transpose(dst, src):
for i in prange(0, src.shape[0]):
for j in prange(0, src.shape[1]):
for k in range(0, src.shape[2]):
dst[i, k, j] = src[i, j, k]
elif axes == (1, 0, 2):
@nb.guvectorize(
[signature], layout, target=target, nopython=True, cache=True
)
def transpose(dst, src):
for i in prange(0, src.shape[0]):
for j in prange(0, src.shape[1]):
for k in range(0, src.shape[2]):
dst[j, i, k] = src[i, j, k]
elif axes == (1, 2, 0):
@nb.guvectorize(
[signature], layout, target=target, nopython=True, cache=True
)
def transpose(dst, src):
for i in prange(0, src.shape[0]):
for j in prange(0, src.shape[1]):
for k in range(0, src.shape[2]):
dst[j, k, i] = src[i, j, k]
elif axes == (2, 1, 0):
@nb.guvectorize(
[signature], layout, target=target, nopython=True, cache=True
)
def transpose(dst, src):
for i in prange(0, src.shape[0]):
for j in prange(0, src.shape[1]):
for k in range(0, src.shape[2]):
dst[k, j, i] = src[i, j, k]
elif axes == (2, 0, 1):
@nb.guvectorize(
[signature], layout, target=target, nopython=True, cache=True
)
def transpose(dst, src):
for i in prange(0, src.shape[0]):
for j in prange(0, src.shape[1]):
for k in range(0, src.shape[2]):
dst[k, i, j] = src[i, j, k]
else:
raise NotImplementedError(axes)
else:
raise NotImplementedError(ndim)
def _exec_transpose(transpose=transpose, dst=dst, src=src):
transpose(dst, src)
return _exec_transpose